Input BLAST data

blast <- read.csv("cannsyn_blast.csv")
#blast[1:3,]
#table(blast$qseqid)
cbdas <- blast[blast$qseqid == "AB292682.1", ]
#cbdas <- cbdas[cbdas$pident >= 98, ]
cbdas$sseqid <- sub("\\.chr7", "", cbdas$sseqid)
#table(cbdas$sseqid)
tmp <- read.csv("cbdas_ncbi.csv")
#tmp <- tmp[tmp$pident >= 98, ]
# tmp[1:3, ]
#table(tmp$sseqid)
#tmp[tmp$sseqid == "Cannbio-2", ]
#tmp[tmp$sseqid == "jl", ]

cbdas <- rbind(cbdas, tmp)
cbdas <- cbdas[cbdas$pident >= 98, ]
#table(cbdas$sseqid)

BUSCO input

busc <- read.csv("BUSCOchrom7.csv.gz")
#busc[1:3, ]

Manage publics

#busc <- busc[, grep("CBDRx.NC_044378.1|Abacus.CM046076.1|Finola.CM011610.1|Purple_Kush.CM010797.2", colnames(busc), invert = TRUE)]
busc <- busc[, grep("CBDRX.CBDRX.chr7|ABAC.ABAC.chr7|FIN.FIN.chr7|PK.PK.chr7", colnames(busc), invert = TRUE)]

colnames(busc)[ colnames(busc) == "CBDRx.NC_044378.1"] <- "CBDRx"
colnames(busc)[ colnames(busc) == "Abacus.CM046076.1"] <- "Abacus"
colnames(busc)[ colnames(busc) == "Finola.CM011610.1"] <- "Finola"
colnames(busc)[ colnames(busc) == "Purple_Kush.CM010797.2"] <- "Purple Kush"
colnames(busc)[ colnames(busc) == "Cannbio.2.CM028020.1"] <- "Cannbio-2"
colnames(busc)[ colnames(busc) == "jl_Kyirong.CM022973.1"] <- "jl_Kyirong"
# busc[1:3, 1:8]
# busc[1:3, (ncol(busc)-4):ncol(busc)]

BUSCOplot available below.

devtools::install_github(repo="knausb/BUSCOplot")
library(BUSCOplot)
library(ggplot2)
#class(unlist(busc["BUSCO_ID=100415at71240", ]))

#busc <- busc[ , 60:ncol(busc)]
#colnames(busc)

# LETTERS[1:10] %in% LETTERS[3:4]
#colnames(busc)[ !colnames(busc) %in% cbdas$sseqid ]
busc <- busc[, colnames(busc) %in% cbdas$sseqid]
#colnames(busc)
#names(busc) %in% cbdas$sseqid
cbdas <- cbdas[grep("JL_Mother", cbdas$sseqid, invert = TRUE), ]
myx <- factor(cbdas$sseqid, levels = colnames(busc))

p <- gg_line_map(busc, check_table = FALSE, size = 1.2, lalpha = 0.2)
p

p <- p + annotate( geom = "line", x = as.numeric(myx), y = cbdas$sstart, linewidth = 1.2, color = "#1E90FF")
p <- p + annotate( geom = "point", x = myx, y = cbdas$sstart, shape = 24, bg = "#1E90FF", size = 4)
p

# ggsave(filename = "ggbusco_lineplot_chrom7_public.png",
#        device = "png", width = 6.5, height = 4.5, units = "in", dpi = 300)

Sort

busc <- busc[ , sort.int(colnames(busc) ,index.return = TRUE)$ix]
busc <- cbind(busc[ , grep("CBDRx|Abacus|Finola", colnames(busc), invert = TRUE)],  busc[ , c("CBDRx", "Abacus", "Finola")])
myx <- factor(cbdas$sseqid, levels = colnames(busc))
p <- gg_line_map(busc, check_table = FALSE, size = 1.2, lalpha = 0.2)
#p

Final graphic

p <- p + annotate( geom = "line", x = as.numeric(myx), y = cbdas$sstart, linewidth = 1.2, color = "#1E90FF")
p <- p + annotate( geom = "point", x = myx, y = cbdas$sstart, shape = 24, bg = "#1E90FF", size = 4)
p

# ggsave(filename = "ggbusco_lineplot_chrom7_public.png",
#        device = "png", width = 6.5, height = 4.5, units = "in", dpi = 300)

Figure X. BUSCO genes for Chromosome 7. Lines connect identical BUSCO genes among neighboring chromosomes (samples). Changes in the elevation and spacing among BUSCO genes reflects structural variation within samples, while the lines draw attention to variability among each sample’s immediate neighbor. Blue triangles indicate high stringency matches to CBDAS (BLASTN query of NCBI accession “AB292682.1”, percent identity >= 0.98, number of gaps = 0). The samples ‘CBDRx’, ‘Abacus’, and ‘Finola’ were obtained from NCBI (https://www.ncbi.nlm.nih.gov/), the remaining samples were assembled in the present work.